# coding: utf-8
from bs4 import BeautifulSoup
import requests
import re

title = []  # 小说名
href = []  # 链接
url = 'http://www.37zw.net/1/1429/'
res = requests.get(url)
res.encoding = 'gbk'
soup = BeautifulSoup(res.text, 'html.parser')
href_name = soup.find('div', id='list').select('dd > a')
# print(href_name)


for item in href_name:  # 保存小说名和链接
    title.append(item.text)
    href.append(item['href'])
    # # print(title)
    # print(href)

for i in range(len(href)+1):
    try:
        print("爬取第" + str(i) + "章中……")
        new_url = 'http://www.37zw.net/1/1429/' + href[i]
        res_new = requests.get(new_url)
        res_new.encoding = 'gbk'
        soup = BeautifulSoup(res_new.text, 'html.parser')
        h1_title = soup.select('h1')[0].text
        content = soup.select('#content')[0]

        cont = str(content)
        cont = re.sub('<\s*script[^>]*>[^<]*<\s*/\s*script\s*>', '', cont)
        cont = re.sub('</div>', '', cont)  # 删除br标签
        cont = re.sub('<div\s\S*>', '', cont)
        cont = re.sub('<br/>', '\n', cont)  # 替换换行符

        with open('D:/凡人修仙传.txt', 'a', encoding='utf8') as f:
            f.write(h1_title)
            f.write(cont)
            print("success")
    except EnvironmentError:
        print("Sorry， 爬取第" + str(i + 1) + "章失败")
